# Required Packages
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import pandas_datareader.data as pdr
from datetime import datetime
import math
# Progress Bar
from IPython.core.display import Image, display
import progressbar
# Plots
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
# sns setting
sns.set_context("paper", rc={"font.size":12,"axes.titlesize":14,"axes.labelsize":12})
# plt setting
sns.set_style('whitegrid')
mpl.rcParams['axes.labelsize'] = 14
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12
mpl.rcParams['text.color'] = 'k'
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
In this article, the Technology Services Sector data from Yahoo! Finance is used, and we analyze the current top tech companies' stock prices.
| Symbol | Name |
|---|---|
| AAPL | Apple Inc. |
| MSFT | Microsoft Corporation |
| TSM | Taiwan Semiconductor Manufacturing Company Limited |
| INTC | Intel Corporation |
| CSCO | Cisco Systems, Inc. |
| ORCL | Oracle Corporation |
| SAP | SAP SE |
| ADBE | Adobe Inc. |
| CRM | salesforce.com, inc. |
| NVDA | NVIDIA Corporation |
| ACN | Accenture plc |
| AVGO | Broadcom Inc. |
| IBM | International Business Machines Corporation |
| ASML | ASML Holding N.V. |
| TXN | Texas Instruments Incorporated |
| QCOM | QUALCOMM Incorporated |
| FIS | Fidelity National Information Services, Inc. |
| SNE | Sony Corporation |
| INTU | Intuit Inc. |
| VMW | VMware, Inc. |
| AMAT | Applied Materials, Inc. |
| MU | Micron Technology, Inc. |
| NOW | ServiceNow, Inc. |
| UBER | Uber Technologies, Inc. |
| AMD | Advanced Micro Devices, Inc. |
Tech_list = ['AAPL','MSFT','TSM','INTC','CSCO','ORCL','SAP','ADBE','CRM',
'NVDA','ACN','AVGO','IBM','ASML','TXN','QCOM','FIS','SNE','INTU','VMW','AMAT','MU','NOW','UBER','AMD']
Tech_Dic = {'AAPL':'Apple Inc.',
'MSFT':'Microsoft Corporation',
'TSM':'Taiwan Semiconductor Manufacturing Company Limited',
'INTC':'Intel Corporation',
'CSCO':'Cisco Systems, Inc.',
'ORCL':'Oracle Corporation',
'SAP':'SAP SE',
'ADBE':'Adobe Inc.',
'CRM':'salesforce.com, inc.',
'NVDA':'NVIDIA Corporation',
'ACN':'Accenture plc',
'AVGO':'Broadcom Inc.',
'IBM':'International Business Machines Corporation',
'ASML':'ASML Holding N.V.',
'TXN':'Texas Instruments Incorporated',
'QCOM':'QUALCOMM Incorporated',
'FIS':'Fidelity National Information Services, Inc.',
'SNE':'Sony Corporation',
'INTU':'Intuit Inc.',
'VMW':'VMware, Inc.',
'AMAT':'Applied Materials, Inc.',
'MU':'Micron Technology, Inc.',
'NOW':'ServiceNow, Inc.',
'UBER':'Uber Technologies, Inc.',
'AMD':'Advanced Micro Devices, Inc.'}
The data is collected from a year before today until now.
start, end = [datetime(datetime.today().year-1,datetime.today().month,datetime.today().day), datetime.today()]
start, end
Collecting data from Yahoo Finance!, and creating moving averages for 10, 20 and 60 day periods of time.
def Get_Data(Inp):
Days = [10, 20, 60]
Out = pdr.DataReader(Inp, 'yahoo', start, end)
Out.insert(0, 'Symbol', Inp)
for j in Days:
column_name = "Moving Ave. %s days" % (str(j))
Out[column_name] = Out['Adj Close'].rolling(window=j, center=False).mean()
return Out
Data = Get_Data(Tech_list[0])
Counter = 0
Progress_Bar = progressbar.ProgressBar(maxval=len(Tech_list),
widgets=[progressbar.Bar('#', '|', '|'), progressbar.Percentage()])
Progress_Bar.start()
for i in Tech_list[1:]:
Counter+=1
Progress_Bar.update(Counter)
Temp = Get_Data(i)
Data = pd.concat([Data, Temp])
del Temp
Progress_Bar.finish()
Displaying today's data only:
Today = Data[Data.index == datetime.today().strftime('%Y-%m-%d')].reset_index(drop = True)
Today.style.hide_index()
Consider Apple Inc. for example. We have,
def Disp_Data(Inp):
Out = Data[Data.Symbol == Inp].drop(columns=['Symbol'])
return Out
Temp = Disp_Data('AAPL')
Temp.describe()
Temp.head().dropna(axis = 1)
Temp.tail()
Let's plot the stocks adjusted the closing price for all stock data under study.
def get_ylim(Inp, Var):
Temp = int(np.float(format(Inp[Var].max(), '.0e')))
Out = Temp + Temp/(Temp/10**(len(str(Temp))-1))/2
return Out
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(18, 12))
for i in range(len(Tech_list)):
Data[Data.Symbol == Tech_list[i]]['Adj Close'].plot(ax=ax, label=Tech_Dic[Tech_list[i]])
_ = ax.legend(loc='upper center', bbox_to_anchor=(1.2, 0.9), shadow=True, ncol=1, fontsize=12)
_ = ax.set_ylim([0, get_ylim(Data, 'Adj Close')])
We can create some new functions that can be beneficial for applications as well.
def List_Search(Inp, key):
# Inp: A list
Out=list()
for i in range(len(Inp)):
if Inp[i].find(key) != -1:
Out.append(Inp[i])
return Out
def List_Diff(Inp_A, Inp_B):
# Inp_A: A list
# Inp_B: A list
Out=list(set(Inp_A)-set(Inp_B))
return Out
# Creating a new list of Columns
Columns = List_Diff(Data.columns.tolist(), List_Search(Data.columns.tolist(), 'Moving Ave'))
Columns = List_Diff(Columns, ['Symbol'])
Temp = ['Ave ' + i for i in Columns]
# A new DataFrame
Ave_df = pd.DataFrame({'Symbol':Tech_list})
for i in Temp:
Ave_df[i]=''
del Temp
# Progress Bar
Counter = 0
Progress_Bar = progressbar.ProgressBar(maxval=len(Tech_list),
widgets=[progressbar.Bar('#', '|', '|'), progressbar.Percentage()])
Progress_Bar.start()
for i in range(len(Tech_list)):
Counter+=1
Progress_Bar.update(Counter)
Ave_df.iloc[i,1:] = Data[Data.Symbol == Tech_list[i]][Columns].mean().values
Progress_Bar.finish()
Ave_df.style.hide_index()
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(18, 14), sharex=False)
# Upper Plot
Ave_df.plot.bar(x='Symbol', y='Ave Volume', rot=90,ax=ax[0], legend=False ,color='#34495e', edgecolor='k')
_ = ax[0].set_ylim([0, get_ylim(Ave_df, 'Ave Volume')])
# Lower Plot
Temp = Ave_df.drop(columns=['Ave Volume'])
Temp.plot.bar(x='Symbol', rot=90,ax=ax[1], legend=True, edgecolor='k')
_ = ax[1].set_ylim([0, get_ylim(Ave_df, 'Ave High')])
_ = ax[1].legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), shadow=True, ncol=len(Temp.columns)-1, fontsize=12)
# Plot setting
plt.subplots_adjust(hspace=0.3,wspace=0.2)
Let's plot moving averages for 10, 20 and 60 day periods of time for the top 4 companies with the highest volume on average.
def TopN_volumes(N, df = Ave_df):
Out = df.sort_values(by='Ave Volume', ascending=False).iloc[:N,0].tolist()
return Out
# Conisder the Moving Ave and Adj Close columns
Columns = List_Search(Data.columns.tolist(), 'Moving Ave')
Columns.append("Adj Close")
Columns = list(np.sort(Columns))
# A list of top N = 4 companies with the hightest volume on average.
N = 4
mylist = TopN_volumes(N)
# Conisder the Moving Ave and Adj Close columns
Columns = List_Search(Data.columns.tolist(), 'Moving Ave')
Columns.append("Adj Close")
Columns = list(np.sort(Columns))
# A list of top N = 4 companies with the hightest volume on average.
N = 4
mylist = Ave_df.sort_values(by='Ave Volume', ascending=False).iloc[:N,0].tolist()
fig, ax = plt.subplots(nrows = math.ceil(N/2), ncols = 2, figsize=(16, 6*math.ceil(N/2)))
Counter = 0
for i in range(math.ceil(N/2)):
for j in range(2):
Disp_Data(mylist[Counter])[Columns].plot(ax=ax[i,j], legend = True)
_ = ax[i,j].set_title(Tech_Dic[mylist[Counter]])
_ = ax[i,j].legend(loc='upper center', bbox_to_anchor=(0.5, -0.25), shadow=True, ncol=2, fontsize=12)
Counter += 1
plt.subplots_adjust(hspace=0.6, wspace=0.2)
Daily return can be calculated using the percentage change of the adjusted closing price.
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 10))
for i in range(len(mylist)):
Disp_Data(mylist[i])['Adj Close'].pct_change().plot(ax=ax, label=Tech_Dic[mylist[i]])
_ = ax.legend(loc='upper center', bbox_to_anchor=(1.15, 0.9), shadow=True, ncol=1, fontsize=12)
_ = ax.set_ylim([-0.15 , 0.25])
_ = ax.set_title('Daily Return', fontsize=14)
First, we need to create a new data frame by reading the Adj Close column from all stock data under study. We have,
All_data = pdr.DataReader(Tech_list, 'yahoo', start, end)['Adj Close']
All_data.head()
All_data.tail()
The returns can be analyzed using the percentage change from the adj Close.
All_returns = All_data.pct_change()
All_returns.tail()
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 10))
All_returns.plot(marker='*', legend=True, ax=ax)
_ = ax.set_ylim([-0.15 , 0.25])
_ = ax.set_title('Daily Returns', fontsize=14)
_ = ax.legend(Tech_Dic.values(), loc='upper center', bbox_to_anchor=(1.2, 0.9), shadow=True, ncol=1, fontsize=12)
We can see that most high returns happen around the same time. However, it is needed to analyze this more carefully. Using a joinplot, we can investigate the correlation between different data. For example, we can plot a joinplot for Apple Inc. and Microsoft Corporation, and Apple Inc. and Alphabet Inc.;
_ = sns.jointplot('AAPL','MSFT', All_returns, kind='reg', space=0, size=6, ratio=4)
_ = sns.jointplot('AAPL','AMD', All_returns, kind='reg', space=0, size=6, ratio=4)
There seems to be a minor positive correlation between every two pairs. In particular, the return for Apple Inc. and Microsoft Corporation are highly correlated.
Now, we can use the pairplot tool to visualize all.
# Remove missing values
Temp = TopN_volumes(8, df = Ave_df)
Temp = All_returns[Temp].dropna()
# plot
_ = sns.pairplot(Temp, diag_kind='kde')
Nonetheless, the correlation matrix and plot are always convenient to see numerical values for correlations.
# Correlation Matrix
Cor_matrix = Temp.corr()
Cor_matrix
def Correlation_Plot (Df,Fig_Size):
Correlation_Matrix = Df.corr()
mask = np.zeros_like(Correlation_Matrix)
mask[np.triu_indices_from(mask)] = True
for i in range(len(mask)):
mask[i,i]=0
Fig, ax = plt.subplots(figsize=(Fig_Size,Fig_Size))
sns.heatmap(Correlation_Matrix, ax=ax, mask=mask, annot=True, square=True,
cmap =sns.color_palette("RdBu", n_colors=10), linewidths = 0.2, vmin=0, vmax=1, cbar_kws={"shrink": .5})
bottom, top = ax.get_ylim()
_ = ax.set_ylim(bottom + 0.5, top - 0.5)
Correlation_Plot (Temp, 8)
Here, darker shades of blue represent a higher correlation.
def Risk_Plot(data):
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 6))
_ = ax.scatter(data.mean(), data.std(), s=25, color = "#e74c3c")
_ = ax.set_xlabel('Expected Return')
_ = ax.set_ylabel('Risk')
_ = ax.set_xlim([np.float(format(data.mean().min(), '.0e')), np.float(format(data.mean().max(), '.0e'))])
_ = ax.set_ylim([np.float(format(data.std().min(), '.0e')), np.float(format(data.std().max(), '.0e'))])
# adding annotatios
for label,x,y in zip(data.columns, data.mean(), data.std()):
plt.annotate(label, xy=(x,y), xytext=(-50,0), textcoords = 'offset points',
ha = 'right', va = 'bottom', arrowprops=dict(facecolor="#9b59b6", shrink=0.001))
Risk_Plot(All_returns)
The current trend to output a value between 0 and 0.002. We would like to identify a crypto with high return and low risk!
qt = All_returns['AAPL'].quantile(0.05)
qt_pct = abs(All_returns['AAPL'].quantile(0.05))*100
print(qt_pct)
print("""The 0.05 empirical quantile of daily returns is at {0:.2f}.
This means that with 95% confidence, the worst daily loss will not exceed {0:.2f}% (of the investment)."""
.format(qt,qt_pct))
To predict future behaviors, we can implement the Monte Carlo method (also see this link and this link).
# consider a year
days = 365
# Delta t
dt = 1/365
Defining a Monte Carlo function fo the Stock price.
def stock_monte_carlo(start_price, days, mu, sigma):
'''Function takes in stock price, number of days to run, mean and standard deviation values'''
price = np.zeros(days)
price[0] = start_price
shock = np.zeros(days)
drift = np.zeros(days)
for x in range(1,days):
#Shock and drift formulas taken from the Monte Carlo formula
shock[x] = np.random.normal(loc=mu*dt,scale=sigma*np.sqrt(dt))
drift[x] = mu * dt
#New price = Old price + Old price*(shock+drift)
price[x] = price[x-1] + (price[x-1] * (drift[x]+shock[x]))
return price
def Monte_Carlo_Analysis(Inp, mu, sigma, N=1e2, days = days):
# get the data for Inp teach
df = Disp_Data(Inp)
# set the last entry of the open column as the starting price
start_price = df['Open'][-1]
# Ouput Figure
N = int(N)
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 8))
for run in range(100):
_ = plt.plot(stock_monte_carlo(start_price, days, mu, sigma))
_ = ax.set_xlabel('Days')
_ = ax.set_ylabel('Price')
_ = ax.set_title('Monte Carlo Analysis for %s' % Tech_Dic[Inp])
_ = ax.set_xlim([0,days])
return df
def Final_price_distribution_simulations(Inp, mu, sigma, N= 1e4, days = days):
# get the data for Inp teach
df = Disp_Data(Inp)
# set the last entry of the open column as the starting price
start_price = df['Open'][-1]
# Simulations array
N = int(N)
simulations = np.zeros(N)
# Progress Bar
Counter = 0
Progress_Bar = progressbar.ProgressBar(maxval= N, widgets=[progressbar.Bar('#', '|', '|'), progressbar.Percentage()])
Progress_Bar.start()
for i in range(N):
simulations[i] = stock_monte_carlo(start_price, days, mu, sigma)[days-1]
Counter+=1
Progress_Bar.update(Counter)
Progress_Bar.finish()
return simulations
def Final_price_distribution_plot(simulations, Inp):
# get the data for Inp teach
df = Disp_Data(Inp)
# set the last entry of the open column as the starting price
start_price = df['Open'][-1]
# Output Figure
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 8))
q = np.percentile(simulations, 1)
_ = ax.hist(simulations, bins='auto', color = '#34495e')
_ = plt.figtext(0.75, 0.80, s="Start price: $%.2f" % start_price, fontsize = 12)
_ = plt.figtext(0.75, 0.75, "Mean final price: $%.2f" % simulations.mean(), fontsize = 12)
_ = plt.figtext(0.75, 0.70, "VaR(0.99): $%.2f" % (start_price -q,), fontsize = 12)
_ = plt.figtext(0.15,0.665, "q(0.99): $%.2f" % q, fontsize = 12)
_ = ax.set_xlim()
_ = ax.axvline(x=q, linewidth=4, color='#e74c3c')
_ = ax.set_title("Final price distribution for %s after %s days" % (Tech_Dic[Inp], days), weight='bold')
Tech = 'AMD'
# mean
mu = All_returns.mean()[Tech]
# standard deviation
sigma = All_returns.std()[Tech]
# Analysis
Monte_Carlo_Analysis(Tech, mu = mu, sigma = sigma)
Simulations = Final_price_distribution_simulations(Tech, mu = mu, sigma = sigma)
Final_price_distribution_plot(Simulations, Tech)
It seems that AMD's overall price is increasing!
Tech = 'AAPL'
# mean
mu = All_returns.mean()[Tech]
# standard deviation
sigma = All_returns.std()[Tech]
# Analysis
Monte_Carlo_Analysis(Tech, mu = mu, sigma = sigma)
Simulations = Final_price_distribution_simulations(Tech, mu = mu, sigma = sigma)
Final_price_distribution_plot(Simulations, Tech)
It seems that Apple's overall price is increasing!
Tech = 'MSFT'
# mean
mu = All_returns.mean()[Tech]
# standard deviation
sigma = All_returns.std()[Tech]
# Analysis
Monte_Carlo_Analysis(Tech, mu = mu, sigma = sigma)
Simulations = Final_price_distribution_simulations(Tech, mu = mu, sigma = sigma)
Final_price_distribution_plot(Simulations, Tech)
It seems that Microsoft's overall price is increasing!